*Post-secondary fields*
clear
use "Data\field_of_study.dta"


*Now make fields categorization
gen ps_field="Education and Humanities" if substr(educ_field_narrow,1,2)=="02" | substr(educ_field_narrow,1,2)=="01"
replace ps_field="Social Science" if substr(educ_field_narrow,1,2)=="03" | educ_field_narrow=="042" /*group law with social science*/
replace ps_field="Business" if educ_field_narrow=="041"
replace ps_field="STEM" if substr(educ_field_narrow,1,2)=="05" | substr(educ_field_narrow,1,2)=="06" | substr(educ_field_narrow,1,2)=="07"
replace ps_field="Health and Welfare" if substr(educ_field_narrow,1,2)=="09"
replace ps_field="Other" if ps_field=="" /*Includes agriculture, forestry, fisheries, veterinary, services, security, and unkown*/


tab ps_field


*Merge on population sample to create the combined categorization
sort pnr aar

merge 1:1 pnr aar using "Data\kmeans ambition types.dta", keepusing(wage_growth_ambition koen couple_id fined)


**Keep only those for who we observe ambition type**
keep if wage_growth_ambition!=.

*Keep only couples where we observe both partners
gen temp2=koen
destring temp2, replace
by couple_id aar, sort: egen temp3=mean(temp2)
keep if temp3==1.5
drop temp2 temp3

sort pnr aar

gen educ_level_field="Primary" if fined==1
replace educ_level_field="Secondary" if fined==2
replace educ_level_field="Education and Humanities" if (fined==3 | fined==4) & ps_field=="Education and Humanities"
replace educ_level_field="Social Science" if (fined==3 | fined==4) & ps_field=="Social Science"
replace educ_level_field="Business" if (fined==3 | fined==4) & ps_field=="Business"
replace educ_level_field="STEM" if (fined==3 | fined==4) & ps_field=="STEM"
replace educ_level_field="Health and Welfare" if (fined==3 | fined==4) & ps_field=="Health and Welfare"
replace educ_level_field="Other" if (fined==3 | fined==4) & ps_field=="Other"

tab educ_level_field

gen educ_level_field_num=1 if educ_level_field=="Primary"
replace educ_level_field_num=2 if educ_level_field=="Secondary"
replace educ_level_field_num=3 if educ_level_field=="Education and Humanities"
replace educ_level_field_num=4 if educ_level_field=="Social Science"
replace educ_level_field_num=5 if educ_level_field=="Business"
replace educ_level_field_num=6 if educ_level_field=="STEM"
replace educ_level_field_num=7 if educ_level_field=="Health and Welfare"
replace educ_level_field_num=8 if educ_level_field=="Other"

tab educ_level_field_num

drop _merge

*Save*
save "Data\post_secondary_fields_full_population.dta", replace
